CISC3024 Pattern Recognition Final Project¶

Group Members:¶

  • Huang Yanzhen, DC126732
  • Mai Jiajun, DC127853

0. Project Setup¶

0.1 Packages & Device¶

In [239]:
# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from tqdm import tqdm

# Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import scipy.io as sio

# Visualize Result
from sklearn.metrics import (confusion_matrix, accuracy_score,
                            precision_score, recall_score,
                            f1_score, roc_auc_score,
                            roc_curve, auc, precision_recall_curve,
                            average_precision_score)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize

# Basic
import numpy as np
import cv2
import os
import time
from typing import List, Tuple, Union, Dict, OrderedDict as TypingOrderedDict, Optional, Any
from collections import OrderedDict
import random
import itertools
import copy
In [2]:
device_name = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
print(f"Using device: {device_name}")
Using device: cuda

0.2 Global Configurations¶

In [3]:
path_dataset = "./data/SVHN_mat"

1. Data Processing and Augmentation¶

1.1 Download Datasets¶

Define dataset class, retrieve dataset.

1.1 Notes¶

In [ ]:
_dat = sio.loadmat(os.path.join(path_dataset, "train_32x32.mat"))
# _dat['X'][0][0][0]
# np.array(_dat).shape
dat = np.transpose(_dat['X'], (3, 0, 1, 2))
# dat = dat.astype(np.float32)
dat.shape
In [ ]:
# First Image
dat[0].shape
In [ ]:
# First Row of Image
dat[0][0]
In [ ]:
# First Pixel of Image
dat[0][0][0]
In [ ]:
_transform = A.Compose([
    A.Normalize(mean=[0.4376845359802246, 0.4437684714794159, 0.47280389070510864], std=[0.19803018867969513, 0.2010156661272049, 0.19703581929206848]),
    ToTensorV2()
])

_img = dat[0]
_img = _transform(image=_img)['image']
# print(_img)
_img.shape

1.2 Dataset¶

In [227]:
class SVHNDataset(Dataset):
    def __init__(self, mat_file, transform=None):
        data = sio.loadmat(mat_file)
        
        self.images = np.transpose(data['X'], (3, 0, 1, 2))
        self.labels = data['y'].flatten()
        self.labels[self.labels == 10] = 0
        self.transform = transform        # Allow postponed injection of transform.

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # There should always be a transform. 
        # It converts image to float, and permutes it from (32, 32, 3) to Tensor([3, 32, 32]).
        # ...which is important!!
        if self.transform is None:        
            raise ValueError("CISC3024 Custom Error: The transform should not be None when this object is passed into a DataLoader.")

        image = self.transform(image=image)['image']
        return image, label
    
    def get_meanstd(self, contrast_factor=None, random_seed=114514):
        if contrast_factor is not None:
            random.seed(random_seed)
            random_cf_arr = [random.uniform(1 / contrast_factor, contrast_factor) for _ in range(len(self.images))]
            # random_cf = random.uniform(1 / contrast_factor, contrast_factor)
            images_ = []
            for i in range(len(self.images)):
                image = self.images[i]
                image = np.clip(image * random_cf_arr[i], 0, 255)
                image = image.astype(np.uint8)
                images_.append(image)
            images_ = np.array(images_)
        else:
            images_ = self.images
                
        images_ = images_ / 255.0
        mean = np.mean(images_, axis=(0,1,2))
        std = np.std(images_, axis=(0,1,2), ddof=0)
        
        return mean.tolist(), std.tolist()        
    
    def overwrite(self, indices:Union[list, np.ndarray]):
        """
        Create a deep copy of the mother dataset instance and only keep the wanted
        data samples, controlled by indices.
        """
        if any(index < 0 or index >= len(self.labels) for index in indices):
            raise IndexError("CISC3024 Custom Error: One or more indices are out of bounds.")
        
        new_dataset = copy.deepcopy(self)
        new_dataset.images = self.images[indices]
        new_dataset.labels = self.labels[indices]
        return new_dataset

1.2 Peak A Data¶

In [5]:
def peek(dataset, index=None):
    def unnormalize(img, mean, std):
        """Revert the normalization for visualization."""
        img = img * std + mean
        return np.clip(img, 0, 1)

    mean, std = dataset.get_meanstd()

    # Plotting multiple images in a grid
    grid_rows, grid_cols = 1, 6
    
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(6, 6))

    peek_index = random.randint(0, dataset.__len__()-1) if index is None else index
    
    for i in range(grid_cols):
        img_tensor, label = dataset.__getitem__(peek_index)
        img = img_tensor.permute(1, 2, 0).numpy()  # Convert to (H, W, C)
        img = unnormalize(img, mean, std)
    
        ax = axes[i]  # Get subplot axis
        ax.imshow(img)
        ax.set_title(f"Label: {label}")
    
    plt.tight_layout()
    plt.show()
    print(f"Peeking data from training set of index {peek_index}.\nImage Tnesor Size:{dataset.__getitem__(peek_index)[0].shape}")

2. Neural Network¶

2.1 Model Structure¶

In [52]:
class SmallVGG(nn.Module):
    def __init__(self, frame_size=32):
        super(SmallVGG, self).__init__()
        self.frame_size = frame_size
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 16x16

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 8x8

            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 4x4
        )
    
        self.fc_layers = nn.Sequential(
            nn.Linear(frame_size * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

2.2 Train and Evaluate Function¶

In [7]:
def train_and_evaluate(model,
                      train_loader,
                      valid_loader,
                      criterion,
                      optimizer,
                      num_epochs=100,
                      stop_early_params=None):
    # Record Losses to plot
    train_losses = []
    valid_losses = []

    # Early stop params
    current_optimized_model = None
    current_min_valid_loss = np.inf
    num_overfit_epochs = 0

    for epoch in range(num_epochs):
        # Train
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * len(images)
        train_losses.append(running_loss / len(train_loader))

        # Evaluate
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                valid_loss += loss.item() * len(images)

        valid_losses.append(valid_loss / len(valid_loader))
        print(f"Epoch[{epoch+1}/{num_epochs}], Train Loss:{train_losses[-1]:.4f}, Validation Loss:{valid_losses[-1]:.4f}")

        # Early Stop?
        if stop_early_params is None:
            continue
    
        if current_min_valid_loss - stop_early_params["min_delta"] > valid_losses[-1]: # Validation loss decreases
            current_min_valid_loss = valid_losses[-1]
            current_optimized_model = copy.deepcopy(model)
            num_overfit_epochs = (num_overfit_epochs - 1) if num_overfit_epochs > 0 else 0
        else: # Validation loss increases
            num_overfit_epochs += 1

        if num_overfit_epochs > stop_early_params["patience"]:
            print(f"Early stopping at epoch {epoch+1}.")
            model = current_optimized_model
            break

    return train_losses, valid_losses

2.3 Get Predictions¶

Multiple functions are defined to evaluate data. Below is a list of them.

In [8]:
def get_predictions(model_path, extra_loader):
    if not isinstance(model_path, str):
        model_state = model_path
    else:
        model_state = torch.load(model_path)
    model = SmallVGG()
    model.load_state_dict(model_state)
    
    model.to(device)
    model.eval() 
    
    pred_scores = []  # Prob. of predictions
    true_labels = []  # Ground Truth
    pred_labels = []  # Label of prediction, i.e., argmax(softmax(pred_scores))
    
    with torch.no_grad():
        for images, labels in tqdm(extra_loader):
            images, labels = images.to(device), labels.to(device)
        
            outputs = model(images)
    
            pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
    
            pred_scores.extend(pred_scores_batch.cpu().tolist())
            pred_labels.extend(outputs.argmax(dim=1).tolist())
            true_labels.extend(labels.cpu().tolist())
            
    return pred_scores, true_labels, pred_labels

2.4 Get Metrics¶

In [9]:
def get_metrics(true_labels, pred_labels):
    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    recall = recall_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
    f1 = f1_score(true_labels, pred_labels, zero_division=0, average=None, labels=range(0,10))

    return accuracy, precision, recall, f1
In [10]:
def print_metrics(accuracies, f1s):
    print(f"Accuracies:")
    for acc in accuracies:
        print(f"{acc:.3f}", end=" ")
    print("\n")
    
    print(f"F1 Score Lists:")
    mean_f1s = []
    for f1 in f1s:
        for val in f1:
            print(f"{val:.3f}", end=" ")
        mean_f1 = np.mean(f1)
        std_f1 = np.std(f1)
        mean_f1s.append(mean_f1)
        print(f"| Avg F1={mean_f1:.3f}, Std F1={std_f1}")
    print(f"Best: {np.argmax(mean_f1s)+1}-th")
In [12]:
# Compute ROC AUC for each class
def get_roc_auc(true_labels_bin, pred_labels_bin):
    roc_auc = dict()
    for i in range(0, 10):
        roc_auc[i] = roc_auc_score(true_labels_bin[:,i], np.array(pred_scores)[:, i])
    return roc_auc

3. Experiments¶

3.0 Preparation¶

3.0.1 Plot Functions¶

The experiments will be a list of the following structures:

{
    "HYPER_PARAM_1": combo[0],
    "HYPER_PARAM_2": combo[1],
    "train_losses": train_losses,
    "valid_losses": valid_losses,
    "model_state_dict": exp_model.state_dict()
}

Epoch-Loss Curves¶

In [11]:
def plot_el(loaded_experiments, hyper_param_names, n_rows=4, n_cols=4):
    fig_size = (n_cols * 5, n_rows * 5)
    n1, n2 = hyper_param_names
    
    fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=fig_size)
    # plt.tight_layout()
    
    for i, ax in enumerate(axes.flat):
        train_losses, valid_losses = loaded_experiments[i]["train_losses"], loaded_experiments[i]["valid_losses"]
        
        ax.plot(train_losses, label=f"TRL, min={np.min(train_losses):.3f}")
        ax.plot(valid_losses, label=f"VAL, min={np.min(valid_losses):.3f} at step={np.argmin(valid_losses)}")
        ax.set_xlabel("Epochs")
        ax.set_ylabel("Loss")
        ax.set_title(f"{n1}={loaded_experiments[i][n1]}, {n2}={loaded_experiments[i][n2]}")
        ax.legend(loc="upper right")
    
    plt.show()

Get Experiment Results¶

In [12]:
def get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loader):
    experiment_results = []
    n1, n2 = test_hyperparam_names
    for i, exp in enumerate(loaded_experiments):
        pred_scores, true_labels, pred_labels = get_predictions(exp['model_state_dict'], extra_loader)
        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })

        print(f"First 10 true labels:")
        [print(num, end=" ") for num in true_labels[:10]]
        print(f"...\n")

        print(f"First 10 pred labels:")
        [print(num, end=" ") for num in pred_labels[:10]]
        print(f"...\n")

        print(f"First 5 pred_scores:")
        [print(num, end=" ") for num in pred_scores[:5]]
        print(f"...\n")

        # del pred_scores, true_labels, pred_lables
        torch.cuda.empty_cache()
    return experiment_results

Confusion Matrix¶

In [13]:
def plot_cm(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels = exp_rs['true_labels'], exp_rs['pred_labels']
        cm = confusion_matrix(true_labels, pred_labels)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(0,10))
        disp.plot(ax=axes[i], cmap = plt.cm.Blues)
        axes[i].set_title(f"Exp {i+1}: {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
    
    plt.tight_layout()
    plt.show()

Precision-Recall Curve¶

In [14]:
def plot_pr(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows,n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names
    accuracies = []
    f1_scores = []
    
    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_labels'], exp_rs['pred_scores']
        true_labels_bin, pred_labels_bin = label_binarize(true_labels, classes=range(0,10)), label_binarize(pred_labels, classes=range(0,10))
        
        accuracy, precision, recall, f1 = get_metrics(true_labels, pred_labels)
        accuracies.append(accuracy)
        f1_scores.append(f1)
        
        for j in range(0, 10):
            # print(f"Class {j}: Prec:{precision[j]:.2f}, Recall:{recall[j]:.2f}, F_1 Score:{f1[j]:.2f}")
            precision_i, recall_i, _ = precision_recall_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
    
            average_precision = average_precision_score(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            axes[i].step(recall_i, precision_i, where="post", label=f"Class {j} AP={average_precision:.2f}")
            axes[i].set_title(f"PR-Curve {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend()
        axes[i].set_xlabel("Recall")
        axes[i].set_ylabel("Precision")
    
    # for j in range(i+1, 16):
    #     fig.delaxes(axes[j])
    
    plt.tight_layout()
    plt.show()
    return accuracies, f1_scores

ROC-AUC Curve¶

In [15]:
def plot_rocauc(experiment_results, hyper_param_names, curve_type, n_rows=4, n_cols=4):
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
    axes = axes.flatten()

    hparam_1, hparam_2 = hyper_param_names

    for i, exp_rs in enumerate(experiment_results):
        true_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_scores']
        true_labels_bin = label_binarize(true_labels, classes=range(0, 10))

        # All Classes' ROC curve & ROC Area Under Curve
        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for j in range(10):
            fpr[j], tpr[j], _ = roc_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
            roc_auc[j] = auc(fpr[j], tpr[j])

        # Macro-Average ROC & ROC-AUC
        all_fpr = np.unique(np.concatenate([fpr[j] for j in range(10)]))
        mean_tpr = np.zeros_like(all_fpr)
        for j in range(10):
            mean_tpr += np.interp(all_fpr, fpr[j], tpr[j])
        mean_tpr /= 10

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(true_labels_bin.ravel(), np.array(pred_scores).ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        # Plot only Macro or Micro ROC curves
        if curve_type == "macro_micro":
            axes[i].plot(fpr["macro"], tpr["macro"], label=f"Macro (AUC={roc_auc['macro']:.2f})")
            axes[i].plot(fpr["micro"], tpr["micro"], label=f"Micro (AUC={roc_auc['micro']:.2f})")
        elif curve_type == "all":
            # Plot all ROC curves
            for j in range(10):
                axes[i].plot(fpr[j], tpr[j], label=f"Class {j} (AUC={roc_auc[j]:.2f})")

        axes[i].plot([0, 1], [0, 1], "k--")
        axes[i].set_xlabel("False Positive Rate")
        axes[i].set_ylabel("True Positive Rate")
        axes[i].set_title(f"ROC Curve {i+1}, {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
        axes[i].legend(loc='lower right')

    plt.tight_layout()
    plt.show()

3.0.1 Datasets¶

In [16]:
def split_train_valid(train_dataset, train_ratio):
    ori_len = len(train_dataset)
    train_size = int(train_ratio * ori_len)
    valid_size = ori_len - train_size

    # These are subsets!! Don't directly use them or you will spend 2 hours solving for it.
    train_subset, valid_subset = random_split(train_dataset, [train_size, valid_size])

    # Re-construct two SVHNDataset object from indecies    
    train_dataset_ = train_dataset.overwrite(indices=train_subset.indices)
    valid_dataset_ = train_dataset.overwrite(indices=valid_subset.indices)
    
    return train_dataset_, valid_dataset_

3.1 Experiment 1: Optimizer¶

In [38]:
# Universal Train Dataset without splitting
exp1_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))

# Train-Validation Split
exp1_train_dataset, exp1_valid_dataset = split_train_valid(exp1_universal_train_dataset, train_ratio=0.8)

if not isinstance(exp1_train_dataset, SVHNDataset) or not isinstance(exp1_valid_dataset, SVHNDataset):
    raise TypeError("CISC3024 Custom Error: The dataset should be an instance of SVHNDataset.")

# Normalize
exp1_mean, exp1_std = exp1_train_dataset.get_meanstd()
exp1_hyperparams = {
    "num_epochs": 25,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp1_mean, std=exp1_std),
        ToTensorV2()
    ])
}

# Inject Transform
exp1_train_dataset.transform = exp1_hyperparams['transform']
exp1_valid_dataset.transform = exp1_hyperparams['transform']

# Test Dataset
exp1_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp1_hyperparams['transform']) 

del exp1_universal_train_dataset

print(f"Training Size:{exp1_train_dataset.__len__()}, Validation Size:{exp1_valid_dataset.__len__()}")
print(f"Channel Means: {exp1_mean}\nChannel Stds: {exp1_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.43772128224372864, 0.44378969073295593, 0.4728474020957947]
Channel Stds: [0.19793079793453217, 0.20086902379989624, 0.1968136429786682]

Define changing & non-changing hyper parameters.

In [32]:
exp1_models = [SmallVGG().to(device) for _ in range(0,6)]

candidate_optimizers = [
    optim.Adam(exp1_models[0].parameters(), lr=exp1_hyperparams['lr']), 
    optim.SGD(exp1_models[1].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9),
    optim.RMSprop(exp1_models[2].parameters(), lr=exp1_hyperparams['lr']),
    optim.AdamW(exp1_models[3].parameters(), lr=exp1_hyperparams['lr'], weight_decay=0.01),
    optim.Adagrad(exp1_models[4].parameters(), lr=exp1_hyperparams['lr']),
    optim.SGD(exp1_models[5].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9, nesterov=True)]

for model in exp1_models:
    print(id(model), end=", ")
3044296310112, 3043927082032, 3043927083520, 3043927084144, 3043927085008, 3044137279152, 

Train, Validation and Test datasets.

Train, Validation and Test Data Loaders.

In [40]:
# Data Loaders
exp1_train_loader = DataLoader(exp1_train_dataset, batch_size=128, shuffle=True)
exp1_valid_loader = DataLoader(exp1_valid_dataset, batch_size=128, shuffle=True)
exp1_test_loader = DataLoader(exp1_test_dataset, batch_size=128, shuffle=False)

Run Experiments

In [35]:
def run_exp1(optimizers, models, hyper_params, train_loader, valid_loader):
    experiments = []
    for i, [optimizer, exp1_model] in enumerate(zip(optimizers, models)):
        print(f"Experiment {i+1}. Running experiment on optimizer: {optimizer.__class__.__name__}")

        criterion = hyper_params['criterion']
        num_epochs = hyper_params['num_epochs']
        train_losses, valid_losses = train_and_evaluate(exp1_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "optimizer": optimizer.__class__.__name__,
            "others":"same",
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": exp1_model.state_dict()
        })

        del exp1_model, criterion, optimizer
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp1 = run_exp1(candidate_optimizers, exp1_models, exp1_hyperparams, exp1_train_loader, exp1_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp1, f"./models/exp1_{time_str}.pth")

Load Experiments

In [ ]:
exp1_loaded = torch.load("./models/exp1_17305518422052872.pth")
exp1_results = get_experiment_results(exp1_loaded, test_hyperparam_names=["optimizer", "others"], extra_loader=exp1_test_loader)
In [42]:
plot_el(exp1_loaded, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image
In [43]:
plot_cm(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
No description has been provided for this image
In [44]:
exp1_accuracies, exp1_f1s = plot_pr(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
print_metrics(exp1_accuracies, exp1_f1s)
No description has been provided for this image
Accuracies:
0.907 0.196 0.906 0.901 0.735 0.196 

F1 Score Lists:
0.912 0.935 0.938 0.874 0.921 0.905 0.881 0.906 0.847 0.867 | Avg F1=0.899, Std F1=0.028664446684648247
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.888 0.933 0.939 0.876 0.913 0.903 0.878 0.923 0.850 0.865 | Avg F1=0.897, Std F1=0.028470034992916605
0.887 0.935 0.936 0.871 0.914 0.907 0.871 0.908 0.830 0.843 | Avg F1=0.890, Std F1=0.03434799781092271
0.723 0.840 0.802 0.638 0.776 0.700 0.647 0.789 0.515 0.628 | Avg F1=0.706, Std F1=0.09492988307421246
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 1-th
In [45]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="all", n_rows=1, n_cols=6)
No description has been provided for this image
In [46]:
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="macro_micro", n_rows=1, n_cols=6)
No description has been provided for this image

3.2 Experiment 2: Training¶

3.2.1 Experiment 2-1: Rough Search in Epoch Number & Learning Rate¶

In this sub-experiment, we perform a rough search on the epochs and learning rate. We promoted four possible values for both parameters: $$ \text{candidate epochs}=\{10, 15, 20, 25\} $$ $$ \text{candidate lr}=\{1.0\times 10^{-3},1.0\times 10^{-4},1.0\times 10^{-5},1.0\times 10^{-6}\} $$

In [337]:
# Universal Train Dataset without splitting
exp2_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))

# Train & Validation Datasets
exp2_train_dataset, exp2_valid_dataset = split_train_valid(exp2_universal_train_dataset, train_ratio=0.8)
del exp2_universal_train_dataset # Unload the mill and kill the donkey

# Normalize
exp2_mean, exp2_std = exp2_train_dataset.get_meanstd()

exp2_hyperparams = {
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.Adam,
}

exp2_train_dataset.transform = exp2_hyperparams['transform']
exp2_valid_dataset.transform = exp2_hyperparams['transform']

# Test Dataset
exp2_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"test_32x32.mat"), transform=exp2_hyperparams['transform'])

print(f"Training Size:{exp2_train_dataset.__len__()}, Validation Size:{exp2_valid_dataset.__len__()}")
print(f"Channel Means: {exp2_mean}\nChannel Stds: {exp2_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.4378254993341946, 0.4438759110538289, 0.4728877990066393]
Channel Stds: [0.1979372781387122, 0.20097342894752787, 0.1970604050118251]
In [60]:
candidate_epochs = [10, 15, 20, 25]
candidate_lr = [1e-3, 1e-4, 1e-5, 1e-6]
In [61]:
exp2_train_loader = DataLoader(exp2_train_dataset, batch_size=128, shuffle=True)
exp2_valid_loader = DataLoader(exp2_valid_dataset, batch_size=128, shuffle=True)
exp2_test_loader = DataLoader(exp2_test_dataset, batch_size=128, shuffle=False)
In [62]:
def run_exp2_1(epochs, lr_list, hyper_params, train_loader, valid_loader):
    combinations = list(itertools.product(epochs, lr_list))
    experiments = []
    for i, combo in enumerate(combinations):
        num_epochs, lr = combo

        print(f"Running Exp {i+1}: num_epoch={num_epochs}, lr={lr}")
        this_model = SmallVGG().to(device)
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, valid_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
In [ ]:
exp2_1 = run_exp2_1(candidate_epochs, candidate_lr, exp2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp2_1, f"./models/exp2-1_{time_str}.pth")
In [ ]:
exp2_1_loaded = torch.load("./models/exp2-1_17305539358378615.pth")
exp2_1_results = get_experiment_results(exp2_1_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.1-1 Epoch-Loss Curve¶

We found that the key to the training performance of a model is the learning rate. Epoch number only controls the progress of training.

From the perspective of learning rate (each column), only the learning rate of $1.0\times 10^{-3}$ shows a sign of convergence under each candidate epochs. With this learning rate, the model even overfitted under experiments with an epoch number over $15$. The best model we conclude from this rough selection is the one with the combination of $\text{num\_epoch}=10\land\text{lr}=1.0\times10^{-3}$. The minimum validation loss is $36.648$ at step $7$, which is the lowest of all $16$ samples. However, this doesn't mean that it is optimal since it may jump over a local minimum.

Moreover, as we inspect the performance on smaller learning rates, we found that they tend to converge in a way further epoch steps. Moreover, for the learning rate $1.0\times 10^{-6}$, the learning rate is too low that the model can not even fit under nearly-finite epochs.

In [65]:
plot_el(exp2_1_loaded, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-2 Confusion Matrix¶

In this rough search, the confusion matrix varies on different learning rates, and tends to be identical on different epochs.

Under a same epoch number, as leraning rate gets smaller, the confusion matrix gets "blurrer", meaning that the prediction is less accurate from the whole perspective. The learning rates under $1.0\times 10^{-5}$ are too low that the model can't converge in a reasonably number of epochs. For the lowest learning rate of $1.0\times 10^{-6}$, the model is not fitted at all. It classifies every number into 1, the number with the richest amount in the dataset.

In [71]:
plot_cm(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
No description has been provided for this image

3.2.1-3 Precision-Recall Curve¶

From a numerical perspective over the testing performance, the combination of $\text{num\_epoch}=15\land\text{lr}=1.0\times10^{-3}$ gives the highest accuracy of $0.907$, highest average $F_1$ score of $0.916$ and the lowest $F_1$ variance per-class of $0.025$.

In [72]:
exp2_1_accuracies, exp2_1_f1s = plot_pr(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
print_metrics(exp2_1_accuracies, exp2_1_f1s)
No description has been provided for this image
Accuracies:
0.917 0.825 0.336 0.196 0.912 0.865 0.471 0.196 0.907 0.878 0.663 0.196 0.908 0.880 0.713 0.196 

F1 Score Lists:
0.918 0.942 0.947 0.889 0.925 0.913 0.900 0.931 0.855 0.868 | Avg F1=0.909, Std F1=0.029051096513454423
0.811 0.904 0.880 0.774 0.838 0.791 0.745 0.860 0.703 0.731 | Avg F1=0.804, Std F1=0.06335319369501424
0.040 0.570 0.378 0.189 0.273 0.087 0.002 0.237 0.026 0.004 | Avg F1=0.180, Std F1=0.17824165219527116
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.914 0.934 0.945 0.882 0.932 0.896 0.878 0.919 0.863 0.875 | Avg F1=0.904, Std F1=0.02715514894076909
0.866 0.930 0.912 0.808 0.881 0.829 0.809 0.899 0.794 0.769 | Avg F1=0.850, Std F1=0.05240811428774516
0.154 0.743 0.509 0.321 0.550 0.430 0.122 0.550 0.005 0.080 | Avg F1=0.347, Std F1=0.23453887474256918
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.916 0.926 0.937 0.875 0.922 0.905 0.879 0.919 0.858 0.863 | Avg F1=0.900, Std F1=0.027185167062975517
0.875 0.932 0.913 0.830 0.890 0.849 0.841 0.904 0.810 0.815 | Avg F1=0.866, Std F1=0.04073441033554439
0.596 0.770 0.749 0.632 0.679 0.614 0.618 0.699 0.294 0.526 | Avg F1=0.618, Std F1=0.1283615011641036
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.904 0.943 0.926 0.881 0.914 0.913 0.876 0.921 0.863 0.850 | Avg F1=0.899, Std F1=0.02860358424237646
0.879 0.924 0.925 0.821 0.886 0.867 0.832 0.905 0.820 0.820 | Avg F1=0.868, Std F1=0.040548747485917386
0.689 0.842 0.773 0.646 0.711 0.672 0.640 0.763 0.460 0.549 | Avg F1=0.674, Std F1=0.10523037482252583
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
Best: 1-th
In [73]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [74]:
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.2.2 Experiment 2-2: Detailed Search in Epoch Number & Learning Rate¶

Previous sub-experiment tells that the best combination from all the listed ones is $\text{num\_epoch}=15 \land \text{lr}=1.0\times 10^{-3}$.

This is a rough solution, as it may jump over local minimums. We want to find a better learning rate around $1.0\times 10^{-3}$, with an even more detailed distinction between candidate values, so that it may reveal a missing local minimum without using too much epochs.

We conducted an excessive experiment, purposely seeking an overfitting point over the listed candidate learning rates. We do this by setting the epoch number to $50$.

In [338]:
exp2_2_hyperparams = {
    "num_epoch": 15,
    "criterion": nn.CrossEntropyLoss(),
    "transform": A.Compose([
        A.Normalize(mean=exp2_mean, std=exp2_std),
        ToTensorV2()
    ]),
    "optimizer":optim.Adam,
}

# More detailed candidate learning rates around 1e-3, that is 10e-4.
exp2_2_candidate_lr = np.geomspace(1e-4, 5e-3, 8)
print(exp2_2_candidate_lr)
[0.0001     0.00017487 0.00030579 0.00053472 0.00093506 0.00163512
 0.0028593  0.005     ]
In [339]:
def run_exp2_2(lr_list, hyper_params, train_loader, test_loader):
    experiments = []
    for i, lr in enumerate(lr_list):

        print(f"Running Exp {i+1}: lr={lr}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epoch']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        train_losses, valid_losses = train_and_evaluate(this_model, train_loader, test_loader, criterion, optimizer, num_epochs)

        experiments.append({
            "num_epochs": num_epochs,
            "lr": lr,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
In [ ]:
exp2_2 = run_exp2_2(exp2_2_candidate_lr, exp2_2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp2_2, f"./models/exp2-2_{time_str}.pth")
In [ ]:
exp2_2_loaded = torch.load("./models/exp2-2_17305693866477516.pth")
exp2_2_results = get_experiment_results(exp2_2_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)

3.2.2-1 Epoch-Loss Curve¶

For timing issues, an epoch of $15$ is our tolerance line. By inspecting the epoch-loss curve, we found that all the experiments are nearly overfitted around the end of the epochs.

From all the detailed searches, the learning rate of 9.35e-4, yields the lowest validation loss of $38.227$ at step $7$, which is the overfitting point. This minimum is worse than the one produced by the learning rate 1e-3, which is $36.549$. Therefore, for the trade-off of time and performance, we choose the combination of epoch=$15$ and lr=$0.001$ for the following experiments.

In [108]:
plot_el(exp2_2_loaded, ["num_epochs", "lr"], n_rows=2, n_cols=4)
No description has been provided for this image

3.2.2-2 Confusion Matrix¶

At a glance, from the perspective of confusion matrix, the testing performance on unknown data is roughly identical.

In [113]:
plot_cm(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
No description has been provided for this image

3.2.2-3 Precision-Recall Curve¶

By inspecting the evaluation metrics, we found our judgement correct. From all the over-fitted model, the model with learning rate of $7.0\times 10^{-4}$ yields the highest accuracy of $0.909$ and the highest average per-class $F_1$ score of $0.903$. Besides, the per-class $F_1$ score is also less variant under the learning rate of $7.0\times 10^{-4}$, with the standard deviation of $0.023$.

In [114]:
exp2_2_accuracies, exp2_2_f1s = plot_pr(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
print_metrics(exp2_2_accuracies, exp2_2_f1s)
No description has been provided for this image
Accuracies:
0.873 0.885 0.905 0.906 0.912 0.903 0.893 0.882 

F1 Score Lists:
0.873 0.929 0.912 0.822 0.888 0.851 0.829 0.901 0.795 0.798 | Avg F1=0.860, Std F1=0.04542290722465954
0.892 0.936 0.916 0.850 0.905 0.854 0.843 0.906 0.819 0.808 | Avg F1=0.873, Std F1=0.04146430928239567
0.908 0.942 0.941 0.874 0.921 0.888 0.857 0.924 0.840 0.839 | Avg F1=0.893, Std F1=0.03748405274950539
0.911 0.942 0.941 0.868 0.923 0.889 0.875 0.917 0.868 0.829 | Avg F1=0.896, Std F1=0.03485676831682514
0.913 0.936 0.943 0.887 0.928 0.912 0.875 0.926 0.852 0.875 | Avg F1=0.905, Std F1=0.029020749926052154
0.901 0.932 0.937 0.880 0.908 0.912 0.873 0.918 0.835 0.841 | Avg F1=0.894, Std F1=0.033859473636702976
0.887 0.926 0.934 0.862 0.900 0.882 0.854 0.915 0.829 0.836 | Avg F1=0.883, Std F1=0.035070141262265965
0.873 0.921 0.919 0.859 0.888 0.880 0.832 0.891 0.814 0.827 | Avg F1=0.870, Std F1=0.03530344586873121
Best: 5-th

3.2.2-4 ROC-AUC Curve¶

The ROC-AUC Curve under all the detailed candidate learning rates are roughly identical.

In [116]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="all", n_rows=2, n_cols=4)
No description has been provided for this image
In [117]:
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=2, n_cols=4)
No description has been provided for this image

3.2.3 Experiment 2-3: Batch Size¶

In [350]:
exp2_3_hyperparams = {
    "num_epoch": 100,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.Adam,
}

candidate_batch_sizes = [16, 32, 64, 128]
In [372]:
def run_exp2_3(batch_sizes, hyper_params, train_dataset, valid_dataset):
    experiments = []
    for i, b_size in enumerate(batch_sizes):
        print(f"Running Exp {i+1}: batch_size={b_size}")
        train_loader = DataLoader(train_dataset, batch_size=b_size, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=b_size, shuffle=True)

        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epoch']
        criterion = hyper_params['criterion']
        lr = hyper_params['lr']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                        train_loader, 
                                                        valid_loader, 
                                                        criterion, 
                                                        optimizer, 
                                                        num_epochs,
                                                        stop_early_params={
                                                            "min_delta": 0.01,
                                                            "patience": 5
                                                        })

        experiments.append({
            "batch_size": b_size,
            "others": "same",
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
    return experiments
In [ ]:
exp2_3 = run_exp2_3(candidate_batch_sizes, exp2_3_hyperparams, exp2_train_dataset, exp2_valid_dataset) 
time_str = str(time.time()).replace(".", "")
torch.save(exp2_3, f"./models/exp2-3_{time_str}.pth")
In [383]:
def exp2_3_get_experiment_results(loaded_experiments, test_hyperparam_names, extra_dataset):
    experiment_results = []
    n1, n2 = test_hyperparam_names
    for i, [exp, extra_loader] in enumerate(zip(loaded_experiments, extra_dataset)):
        extra_loader = DataLoader(extra_dataset, batch_size=exp['batch_size'], shuffle=False)
        pred_scores, true_labels, pred_labels = get_predictions(exp["model_state_dict"], extra_loader)
        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })
        torch.cuda.empty_cache()
    return experiment_results
In [ ]:
exp2_3_loaded = torch.load("./models/exp2-3_17308762912663589.pth")
exp2_3_results = exp2_3_get_experiment_results(exp2_3_loaded, test_hyperparam_names=["batch_size", "others"], extra_dataset=exp2_test_dataset)
In [385]:
plot_el(exp2_3_loaded, ["batch_size", "others"], n_rows=1, n_cols=4)
No description has been provided for this image
In [386]:
plot_cm(exp2_3_results, ["batch_size", "others"], n_rows=1, n_cols=4)
No description has been provided for this image
In [389]:
exp2_3_accuracies, exp2_3_f1s = plot_pr(exp2_3_results, ["batch_size", "others"], n_rows=1, n_cols=4)
print_metrics(exp2_3_accuracies, exp2_3_f1s)
No description has been provided for this image
Accuracies:
0.911 0.914 0.911 0.911 

F1 Score Lists:
0.907 0.936 0.938 0.876 0.931 0.913 0.885 0.916 0.857 0.876 | Avg F1=0.904, Std F1=0.02698171524438727
0.914 0.937 0.939 0.891 0.924 0.924 0.897 0.913 0.871 0.862 | Avg F1=0.907, Std F1=0.024938019187106995
0.906 0.937 0.944 0.882 0.923 0.917 0.879 0.923 0.858 0.849 | Avg F1=0.902, Std F1=0.031413271954473965
0.913 0.942 0.943 0.868 0.929 0.911 0.895 0.922 0.852 0.850 | Avg F1=0.902, Std F1=0.03326424565964047
Best: 2-th
In [390]:
plot_rocauc(exp2_3_results, ["batch_size", "others"], curve_type="all", n_rows=1, n_cols=4)
No description has been provided for this image
In [391]:
plot_rocauc(exp2_3_results, ["batch_size", "others"], curve_type="macro_micro", n_rows=1, n_cols=4)
No description has been provided for this image

3.3 Experiment 3: Image Augmentation Parameters¶

3.3.1 Experiment 3-1: Rotation Angles and Crop Percentages¶

In [228]:
exp3_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))

exp3_train_dataset, exp3_valid_dataset = split_train_valid(exp3_universal_train_dataset, train_ratio=0.8)
del exp3_universal_train_dataset

# The mean & std here will only be used for experiment 3-1.
exp3_1_mean, exp3_1_std = exp3_train_dataset.get_meanstd()

exp3_1_hyperparams = {
    "num_epochs": 50,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.Adam,
    "transform": A.Compose([
        A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
        ToTensorV2()
    ])
}

# Transform of train dataset will be altered in the experiments.
exp3_valid_dataset.transform = exp3_1_hyperparams['transform']
exp3_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp3_1_hyperparams["transform"])

print(f"Training Size:{exp3_train_dataset.__len__()}, Validation Size:{exp3_valid_dataset.__len__()}")
print(f"Channel Means: {exp3_1_mean}\nChannel Stds: {exp3_1_std}")
Training Size:58605, Validation Size:14652
Channel Means: [0.43757779153461307, 0.443731916543914, 0.47288011100561006]
Channel Stds: [0.19818649325776583, 0.20113878491802037, 0.1971410629011666]
In [61]:
# Group 1
candidate_angles = [15, 30, 45, 60]
candidate_crops = [0.08, 0.24, 0.40, 0.60] # Left Boundary
In [206]:
exp3_valid_loader = DataLoader(exp3_valid_dataset, batch_size=128, shuffle=False)
exp3_test_loader = DataLoader(exp3_test_dataset, batch_size=128, shuffle=False)
In [63]:
def run_exp3_1(angles, crops, hyper_params, train_dataset, valid_loader):
    combinations = list(itertools.product(angles, crops))
    experiments = []
    for i, combo in enumerate(combinations):
        angle, crop = combo
        
        print(f"Running Exp {i+1}: angles={angle}, crop={crop}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)

        # Define Transform
        this_transform = A.Compose([
            A.RandomResizedCrop(32, 32, scale=(crop, 1.0)),
            A.Rotate(limit=angle),
            A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs,
                                                       stop_early_params={
                                                           "min_delta": 0.01,
                                                           "patience": 5
                                                       })

        experiments.append({
            "angle": angle,
            "crop": crop,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp3_1 = run_exp3_1(candidate_angles, candidate_crops, exp3_1_hyperparams, exp3_train_dataset, exp3_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp3_1, f"./models/exp3-1_{time_str}.pth")
In [ ]:
exp3_1_loaded = torch.load("./models/exp3-1_17307349208257582.pth")
exp3_1_results = get_experiment_results(exp3_1_loaded, test_hyperparam_names=["angle", "crop"], extra_loader=exp3_test_loader)
In [68]:
plot_el(exp3_1_loaded, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
In [69]:
plot_cm(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
No description has been provided for this image
In [70]:
exp3_accuracies, exp3_f1s = plot_pr(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
print_metrics(exp3_accuracies, exp3_f1s)
No description has been provided for this image
Accuracies:
0.909 0.902 0.915 0.926 0.901 0.908 0.920 0.927 0.894 0.889 0.906 0.929 0.894 0.892 0.909 0.918 

F1 Score Lists:
0.918 0.936 0.937 0.867 0.934 0.907 0.888 0.901 0.858 0.873 | Avg F1=0.902, Std F1=0.028146078028585732
0.902 0.930 0.923 0.860 0.913 0.913 0.885 0.911 0.857 0.863 | Avg F1=0.896, Std F1=0.02593225413077912
0.913 0.933 0.949 0.905 0.929 0.924 0.878 0.897 0.837 0.891 | Avg F1=0.906, Std F1=0.030528486926295145
0.918 0.951 0.948 0.904 0.937 0.925 0.908 0.938 0.885 0.874 | Avg F1=0.919, Std F1=0.024707654109350188
0.875 0.927 0.926 0.881 0.923 0.910 0.886 0.893 0.842 0.858 | Avg F1=0.892, Std F1=0.027922326916596706
0.893 0.935 0.927 0.879 0.926 0.921 0.888 0.905 0.873 0.863 | Avg F1=0.901, Std F1=0.024139022374309215
0.903 0.948 0.940 0.891 0.938 0.927 0.904 0.918 0.870 0.880 | Avg F1=0.912, Std F1=0.025248490682396124
0.921 0.947 0.947 0.909 0.944 0.927 0.918 0.924 0.885 0.883 | Avg F1=0.920, Std F1=0.02202325661064801
0.895 0.935 0.917 0.862 0.918 0.904 0.878 0.905 0.811 0.803 | Avg F1=0.883, Std F1=0.04265224570562562
0.899 0.927 0.921 0.837 0.911 0.885 0.877 0.883 0.839 0.830 | Avg F1=0.881, Std F1=0.033704954984753885
0.875 0.929 0.944 0.879 0.935 0.910 0.882 0.914 0.836 0.864 | Avg F1=0.897, Std F1=0.03311069193894167
0.903 0.946 0.956 0.912 0.944 0.936 0.912 0.925 0.891 0.897 | Avg F1=0.922, Std F1=0.02147996881433538
0.881 0.923 0.919 0.850 0.918 0.893 0.874 0.889 0.863 0.856 | Avg F1=0.887, Std F1=0.025284746933263623
0.867 0.925 0.922 0.848 0.915 0.898 0.865 0.885 0.849 0.861 | Avg F1=0.884, Std F1=0.02832748402850103
0.889 0.939 0.933 0.880 0.929 0.914 0.884 0.915 0.860 0.866 | Avg F1=0.901, Std F1=0.027214294349668898
0.910 0.946 0.946 0.891 0.919 0.919 0.894 0.931 0.884 0.860 | Avg F1=0.910, Std F1=0.026501694953201833
Best: 12-th
In [71]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [72]:
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

3.3.2 Experiment 3-2: Aspect Ratios & Contrast Factors¶

In [285]:
exp3_2_hyperparams = {
    "num_epochs": 50,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.Adam,
    "crop":0.6,
    "angle":45,
}

class ContrastEnhanceTransform:
    def __init__(self, factor: Union[float, Tuple[float, float]]) -> None:
        if isinstance(factor, tuple):
            self.factor_min = factor[0]
            self.factor_max = factor[1]
        else:
            self.factor_min = 1 / factor
            self.factor_max = factor

    def __call__(self, img: np.ndarray) -> np.ndarray:
        _dtype = img.dtype
        contrast_factor = random.uniform(self.factor_min, self.factor_max)
        img = np.clip(img * contrast_factor, 0, 255)  # apply contrast enhancement
        return img.astype(_dtype)

# Group 2
candidate_ratios = [0.25, 0.42, 0.58, 0.75]
candidate_contrast_factors = [1.2, 1.4, 1.6, 1.8]

Control candidates for different variables

In [286]:
def run_exp3_2(ratios, contrast_factors, hyper_params, train_dataset, valid_dataset):
    combinations = list(itertools.product(ratios, contrast_factors))
    experiments = []
    for i, combo in enumerate(combinations):
        ratio, cf = combo
        
        print(f"Running Exp {i+1}: ratio={ratio}, contrast_factor={cf}")
        this_model = SmallVGG().to(device)
        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)

        # Define Transform
        this_mean, this_std = train_dataset.get_meanstd(contrast_factor=cf)
        this_train_transform = A.Compose([
            A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(cf)(img)),  # Lambda customized transform block
            A.RandomResizedCrop(32, 32, scale=(hyper_params['crop'], 1.0), ratio=(ratio, 1.0 / ratio)),
            A.Rotate(limit=hyper_params['angle']),
            A.Normalize(mean=this_mean, std=this_std),
            ToTensorV2()
        ])

        this_valid_transform = A.Compose([
            A.Normalize(mean=this_mean, std=this_std),
            ToTensorV2()
        ])

        # Generate Dataset
        print(f"Exp {i+1}: Generating dataset from transform")
        train_dataset.transform = this_train_transform
        valid_dataset.transform = this_valid_transform
        
        train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

        # Train Model
        train_losses, valid_losses = train_and_evaluate(this_model, 
                                                       train_loader, 
                                                       valid_loader, 
                                                       criterion, 
                                                       optimizer,
                                                       num_epochs,
                                                       stop_early_params={
                                                           "min_delta": 0.01,
                                                           "patience": 5
                                                       })

        experiments.append({
            "ratio": ratio,
            "contrast_factor": cf,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer
        # del train_loader, valid_loader
        
        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp3_2 = run_exp3_2(candidate_ratios, candidate_contrast_factors, exp3_2_hyperparams, exp3_train_dataset, exp3_valid_dataset)
time_str = str(time.time()).replace(".", "")
torch.save(exp3_2, f"./models/exp3-2_{time_str}.pth")
In [288]:
def exp3_2_get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loaders):
    experiment_results = []
    n1, n2 = test_hyperparam_names
    for i, [exp, extra_loader] in enumerate(zip(loaded_experiments, extra_loaders)):
        pred_scores, true_labels, pred_labels = get_predictions(exp["model_state_dict"], extra_loader)
        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })

        print(f"First 10 true labels: {true_labels[:10]}")
        print(f"First 10 pred labels: {pred_labels[:10]}")
        print(f"First 5 pred_scores: {pred_scores[:5]}")

        torch.cuda.empty_cache()
    return experiment_results
In [289]:
means_stds = [exp3_valid_dataset.get_meanstd(contrast_factor=cf) for cf in candidate_contrast_factors]
_exp3_2_test_loaders = []
for mean_std in means_stds:
    this_mean, this_std = mean_std
    this_transform = A.Compose([
        A.Normalize(mean=this_mean, std=this_std),
        ToTensorV2()
    ])
    this_test_dataset = copy.deepcopy(exp3_valid_dataset)
    this_test_dataset.transform = this_transform
    this_loader = DataLoader(this_test_dataset, batch_size=128, shuffle=False)
    _exp3_2_test_loaders.append(this_loader)

exp3_2_test_loaders = []
for _ in candidate_ratios:
    exp3_2_test_loaders += _exp3_2_test_loaders
In [ ]:
exp3_2_loaded = torch.load("./models/exp3-2_1730819819555844.pth")
exp3_2_results = exp3_2_get_experiment_results(exp3_2_loaded, test_hyperparam_names=["ratio", "contrast_factor"], extra_loaders=exp3_2_test_loaders)
In [292]:
plot_el(exp3_2_loaded, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
No description has been provided for this image
In [293]:
plot_cm(exp3_2_results, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
No description has been provided for this image
In [294]:
exp3_2_accuracies, exp3_2_f1s = plot_pr(exp3_2_results, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
print_metrics(exp3_2_accuracies, exp3_2_f1s)
No description has been provided for this image
Accuracies:
0.920 0.918 0.917 0.917 0.918 0.911 0.913 0.922 0.914 0.916 0.913 0.913 0.923 0.923 0.917 0.922 

F1 Score Lists:
0.894 0.937 0.947 0.909 0.939 0.912 0.896 0.926 0.880 0.912 | Avg F1=0.915, Std F1=0.02067976693819648
0.908 0.934 0.939 0.906 0.934 0.915 0.901 0.925 0.869 0.904 | Avg F1=0.913, Std F1=0.019974054656940087
0.925 0.939 0.929 0.904 0.940 0.916 0.908 0.916 0.852 0.902 | Avg F1=0.913, Std F1=0.02390683851734753
0.890 0.934 0.946 0.898 0.935 0.904 0.907 0.935 0.883 0.894 | Avg F1=0.912, Std F1=0.021508105940274098
0.912 0.936 0.946 0.912 0.930 0.908 0.887 0.922 0.873 0.910 | Avg F1=0.914, Std F1=0.0205745618350115
0.904 0.927 0.939 0.890 0.937 0.900 0.897 0.904 0.866 0.900 | Avg F1=0.907, Std F1=0.02124979354773411
0.912 0.932 0.936 0.888 0.930 0.904 0.889 0.926 0.872 0.898 | Avg F1=0.909, Std F1=0.02080427179136081
0.923 0.937 0.947 0.907 0.930 0.922 0.898 0.922 0.883 0.912 | Avg F1=0.918, Std F1=0.017568940244675372
0.904 0.933 0.936 0.904 0.932 0.908 0.906 0.905 0.872 0.890 | Avg F1=0.909, Std F1=0.019079743671399765
0.919 0.932 0.938 0.901 0.925 0.913 0.903 0.924 0.860 0.908 | Avg F1=0.912, Std F1=0.021048150026752813
0.907 0.933 0.946 0.899 0.938 0.890 0.899 0.920 0.862 0.886 | Avg F1=0.908, Std F1=0.024824526478269996
0.912 0.931 0.944 0.901 0.924 0.894 0.882 0.927 0.864 0.905 | Avg F1=0.908, Std F1=0.0230442781174334
0.920 0.939 0.945 0.910 0.921 0.915 0.906 0.928 0.889 0.912 | Avg F1=0.919, Std F1=0.015365852898010962
0.929 0.942 0.945 0.912 0.928 0.908 0.907 0.936 0.865 0.911 | Avg F1=0.918, Std F1=0.022282154155352875
0.925 0.933 0.943 0.895 0.935 0.903 0.901 0.924 0.871 0.905 | Avg F1=0.914, Std F1=0.021094009578053304
0.927 0.940 0.944 0.896 0.936 0.912 0.920 0.924 0.879 0.910 | Avg F1=0.919, Std F1=0.01913420145325484
Best: 16-th
In [295]:
plot_rocauc(exp3_2_results, ["ratio", "contrast_factor"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [296]:
plot_rocauc(exp3_2_results, ["ratio", "contrast_factor"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image
In [297]:
def exp3_2_peek(dataset, hyper_params, index=21642, ratio=1.0, contrast_factor=0):
    exp3_train_dataset_sample = dataset.overwrite(range(exp3_train_dataset.__len__())) # Deep copy of original
    temp_mean, temp_std = exp3_train_dataset_sample.get_meanstd(contrast_factor=contrast_factor)
    exp3_train_dataset_sample.transform = A.Compose([
                                            A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)),  # Lambda customized transform block
                                            A.RandomResizedCrop(32, 32, scale=(hyper_params["crop"], 1.0), ratio=(ratio, 1.0 / ratio)),
                                            A.Rotate(limit=hyper_params["angle"]),
                                            A.Normalize(mean=temp_mean, std=temp_std),
                                            ToTensorV2()
                                        ])
    peek(exp3_train_dataset_sample, index=index)
    del exp3_train_dataset_sample, temp_mean, temp_std
In [298]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.2)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 9099.
Image Tnesor Size:torch.Size([3, 32, 32])
In [299]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.4)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 9099.
Image Tnesor Size:torch.Size([3, 32, 32])
In [300]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.6)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 9099.
Image Tnesor Size:torch.Size([3, 32, 32])
In [301]:
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.8)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)),  # Lambda customized transform block
No description has been provided for this image
Peeking data from training set of index 9099.
Image Tnesor Size:torch.Size([3, 32, 32])

3.4 Experiment 4: NN Structure¶

3.4.1 NN Structure¶

In [303]:
class Inception(nn.Module):
    def __init__(self, in_channels: int, ch1x1: int, ch3x3_reduce: int, ch3x3: int,
                 ch5x5_reduce: int, ch5x5: int, pool_proj: int):
        super(Inception, self).__init__()

        # 1x1 conv batch
        self.branch1x1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)

        # 1x1 -> 3x3 conv batch
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, ch3x3_reduce, kernel_size=1),
            nn.Conv2d(ch3x3_reduce, ch3x3, kernel_size=3, padding=1)
        )

        # 1x1 -> 5x5 conv batch
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, ch5x5_reduce, kernel_size=1),
            nn.Conv2d(ch5x5_reduce, ch5x5, kernel_size=5, padding=2)
        )

        # 3x3 pool -> 1x1 conv batch
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1x1 = self.branch1x1(x)
        branch3x3 = self.branch3x3(x)
        branch5x5 = self.branch5x5(x)
        branch_pool = self.branch_pool(x)

        outputs = torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], 1)
        return outputs


candidate_seq: List[Tuple[TypingOrderedDict[str, Optional[nn.Module]], TypingOrderedDict[str, Optional[nn.Module]]]] = [
    (OrderedDict([  # first struct: SmallVGG
        ('conv1', nn.Conv2d(3, 8, kernel_size=3, padding=1)),
        ('*1', None),
        ('conv2', nn.Conv2d(8, 16, kernel_size=3, padding=1)),
        ('*2', None),
        ('max1', nn.MaxPool2d(kernel_size=2, stride=2)),  # 16x16

        ('conv3', nn.Conv2d(16, 32, kernel_size=3, padding=1)),
        ('*3', None),
        ('conv4', nn.Conv2d(32, 48, kernel_size=3, padding=1)),
        ('*4', None),
        ('max2', nn.MaxPool2d(kernel_size=2, stride=2)),  # 8x8

        ('conv5', nn.Conv2d(48, 56, kernel_size=3, padding=1)),
        ('*5', None),
        ('conv6', nn.Conv2d(56, 64, kernel_size=3, padding=1)),
        ('*6', None),
        ('max3', nn.MaxPool2d(kernel_size=2, stride=2))  # 4x4
    ]), OrderedDict([
        ('fc1', nn.Linear(64 * 4 * 4, 512)),
        ('*1', None),
        ('fc2', nn.Linear(512, 256)),
        ('*2', None),
        ('fc3', nn.Linear(256, 10))
    ])),

    (OrderedDict([  # second struct: LeNet-5
        ('conv1', nn.Conv2d(3, 12, kernel_size=5, stride=1, padding=2)),
        ('*1', None),
        ('avg1', nn.AvgPool2d(kernel_size=2, stride=2)),

        ('conv2', nn.Conv2d(12, 32, kernel_size=5)),
        ('*2', None),
        ('avg2', nn.AvgPool2d(kernel_size=2, stride=2)),
    ]), OrderedDict([
        ('fc1', nn.Linear(32 * 6 * 6, 256)),
        ('*3', None),
        ('fc2', nn.Linear(256, 128)),
        ('*4', None),
        ('fc3', nn.Linear(128, 10))
    ])),

    (OrderedDict([  # third struct: 2012AlexNet
        ('conv1', nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2)),  # 32x32
        ('*1', None),
        ('max1', nn.MaxPool2d(kernel_size=2, stride=2)),  # 16x16

        ('conv2', nn.Conv2d(64, 192, kernel_size=5, padding=2)),
        ('*2', None),
        ('max2', nn.MaxPool2d(kernel_size=2, stride=2)),  # 8x8

        ('conv3', nn.Conv2d(192, 384, kernel_size=3, padding=1)),
        ('*3', None),

        ('conv4', nn.Conv2d(384, 256, kernel_size=3, padding=1)),
        ('*4', None),

        ('conv5', nn.Conv2d(256, 256, kernel_size=3, padding=1)),
        ('*5', None),
        ('max3', nn.MaxPool2d(kernel_size=2, stride=2))  # 4x4
    ]), OrderedDict([
        ('fc1', nn.Linear(256 * 4 * 4, 4096)),  # 256 * 4 * 4 = 4096
        ('*6', None),
        ('dropout1', nn.Dropout()),

        ('fc2', nn.Linear(4096, 4096)),
        ('*7', None),
        ('dropout2', nn.Dropout()),

        ('fc3', nn.Linear(4096, 10))
    ])),

    (OrderedDict([  # fourth struct: 2014GoogLeNet
        ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)),
        ('max1', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),

        ('conv2', nn.Conv2d(64, 64, kernel_size=1)),
        ('conv3', nn.Conv2d(64, 192, kernel_size=3, padding=1)),
        ('max2', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),

        # Inception 模块
        ('inception3a', Inception(192, 64, 96, 128, 16, 32, 32)),
        ('inception3b', Inception(256, 128, 128, 192, 32, 96, 64)),
        ('max3', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),

        ('inception4a', Inception(480, 192, 96, 208, 16, 48, 64)),
        ('inception4b', Inception(512, 160, 112, 224, 24, 64, 64)),
        ('inception4c', Inception(512, 128, 128, 256, 24, 64, 64)),
        ('inception4d', Inception(512, 112, 144, 288, 32, 64, 64)),
        ('inception4e', Inception(528, 256, 160, 320, 32, 128, 128)),
        ('max4', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),

        ('inception5a', Inception(832, 256, 160, 320, 32, 128, 128)),
        ('inception5b', Inception(832, 384, 192, 384, 48, 128, 128)),

        ('avg1', nn.AdaptiveAvgPool2d((1, 1))),
        ('dropout1', nn.Dropout(0.4)),
    ]), OrderedDict([
        ('fc1', nn.Linear(1024, 10))
    ]))
]

def mix_seq_and_act(seq: Tuple[TypingOrderedDict, TypingOrderedDict],
                    activation_func: nn.Module) -> Tuple[nn.Sequential, nn.Sequential]:
    """
    replace all layers whose names start with '*' to the selected activation function
    """
    conv_seq = seq[0].copy()
    for name, module in conv_seq.items():
        if name.startswith('*'):
            conv_seq[name] = activation_func

    fc_seq = seq[1].copy()
    for name, module in fc_seq.items():
        if name.startswith('*'):
            fc_seq[name] = activation_func

    return nn.Sequential(conv_seq), nn.Sequential(fc_seq)

candidate_seq_name = ['SmallVGG', 'LeNet-5', '2012AlexNet', '2014GoogLeNet', ]

candidate_activation_func: List[nn.Module] = [nn.ReLU(), nn.ELU(), nn.LeakyReLU(), nn.SiLU()]
In [304]:
exp4_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))

exp4_train_dataset, exp4_valid_dataset = split_train_valid(exp4_universal_train_dataset, train_ratio=0.8)

exp4_mean, exp4_std = exp4_train_dataset.get_meanstd(contrast_factor=1.8)

exp4_1_hyperparams = {
    "num_epochs": 100,
    "lr": 1e-3,
    "criterion": nn.CrossEntropyLoss(),
    "optimizer": optim.Adam,
    "train_transform": A.Compose([
        A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(1.8)(img)),
        A.RandomResizedCrop(32, 32, scale=(0.6, 1.0), ratio=(0.75, 1.0 / 0.75)),
        A.Rotate(limit=15),
        A.Normalize(mean=exp4_mean, std=exp4_std),
        ToTensorV2()
    ]),
    "valid_transform": A.Compose([
         A.Normalize(mean=exp4_mean, std=exp4_std),
        ToTensorV2()
    ])
}

exp4_train_dataset.transform = exp4_1_hyperparams["train_transform"]
exp4_valid_dataset.transform = exp4_1_hyperparams["valid_transform"]
exp4_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp4_1_hyperparams["valid_transform"])

print(f"Training Size:{exp4_train_dataset.__len__()}, Validation Size:{exp4_valid_dataset.__len__()}")
print(f"Channel Means:{exp4_mean}\nChannel Stds:{exp4_std}")
D:\Temps\temp\ipykernel_87160\2006685800.py:13: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().
  A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(1.8)(img)),
Training Size:58605, Validation Size:14652
Channel Means:[0.5003052918497749, 0.5060428476243829, 0.5373523839665758]
Channel Stds:[0.2596073596983816, 0.2605003081414301, 0.2568807907866938]
In [305]:
exp4_train_loader = DataLoader(exp4_train_dataset, batch_size=128, shuffle=True)
exp4_valid_loader = DataLoader(exp4_valid_dataset, batch_size=128, shuffle=False)
exp4_test_loader = DataLoader(exp4_test_dataset, batch_size=128, shuffle=False)
In [306]:
def run_exp4_1(sequence_with_name: Tuple[List[str], List[Tuple]],
               activations: List, 
               hyper_params: Dict[str, Any],
               train_loader: DataLoader,
               valid_loader: DataLoader) -> List[Dict[str, Union[List[float], dict, float, int]]]:

    combinations = list(itertools.product(sequence_with_name, activations))
    experiments = []

    for i, combo in enumerate(combinations):
        (seq_name, seq), activations = combo

        print(f"Running Exp {i + 1}: shape={seq_name}, activation func={activations.__class__.__name__}")

        this_model = SmallVGG()
        conv, fc = mix_seq_and_act(seq, activations)
        this_model.conv_layers = conv  # new conv_layers
        this_model.fc_layers = fc  # new fc_layers
        this_model = this_model.to(device)

        num_epochs = hyper_params['num_epochs']
        lr = hyper_params['lr']
        criterion = hyper_params['criterion']
        optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
        
        # Train Model
        print(f"Exp {i + 1}: Generating dataset from transform")
        train_losses, valid_losses = train_and_evaluate(this_model,
                                                        train_loader, valid_loader,
                                                        criterion, optimizer, num_epochs,
                                                        stop_early_params={
                                                            "min_delta": 0.01,
                                                            "patience": 5
                                                        })

        experiments.append({
            "shape": seq_name,
            "act_func": activations.__class__.__name__,
            "train_losses": train_losses,
            "valid_losses": valid_losses,
            "model_state_dict": this_model.state_dict()
        })

        del this_model, criterion, optimizer

        torch.cuda.empty_cache()

    return experiments
In [ ]:
exp4_1 = run_exp4_1(zip(candidate_seq_name, candidate_seq), 
                    candidate_activation_func,
                    exp4_1_hyperparams,
                    exp4_train_loader,
                    exp4_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp4_1, f"./models/exp4-1_{time_str}.pth")
In [324]:
def exp4_1_get_experiment_results(loaded_experiments,
                                   sequence_with_name: Tuple[List[str], List[Tuple]],
                                   activations: List, 
                                   test_hyperparam_names: Dict[str, Any],
                                   extra_loader: DataLoader):
    combinations = list(itertools.product(sequence_with_name, activations))
    experiment_results = []
    n1, n2 = test_hyperparam_names
    
    for i, [combo, exp] in enumerate(zip(combinations, loaded_experiments)):
        (seq_name, seq), activations = combo

        this_model = SmallVGG()
        conv, fc = mix_seq_and_act(seq, activations)
        this_model.conv_layers = conv
        this_model.fc_layers = fc
        this_model.load_state_dict(exp["model_state_dict"])
        this_model = this_model.to(device)

        pred_scores = []
        true_labels = []
        pred_labels = []

        with torch.no_grad():
            for images, labels in tqdm(extra_loader):
                images, labels = images.to(device), labels.to(device)

                outputs = this_model(images)

                pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
                pred_scores.extend(pred_scores_batch.cpu().tolist())
                pred_labels.extend(outputs.argmax(dim=1).tolist())
                true_labels.extend(labels.cpu().tolist())

        experiment_results.append({
            n1: exp[n1],
            n2: exp[n2],
            "true_labels": true_labels,
            "pred_labels": pred_labels,
            "pred_scores": pred_scores
        })

        print(f"First 10 true labels: {true_labels[:10]}")
        print(f"First 10 pred_labels: {pred_labels[:10]}")
        print(f"First 5 pred_scores: {pred_scores[:5]}")

        torch.cuda.empty_cache()
    return experiment_results
In [ ]:
exp4_1_loaded = torch.load("./models/exp4-1_17308362503569772.pth")
exp4_1_results = exp4_1_get_experiment_results(exp4_1_loaded, 
                                        zip(candidate_seq_name, candidate_seq), 
                                        candidate_activation_func, 
                                        test_hyperparam_names=["shape", "act_func"], 
                                        extra_loader=exp4_test_loader)
In [313]:
plot_el(exp4_1_loaded, ["shape", "act_func"], n_rows=4, n_cols=4)
No description has been provided for this image
In [327]:
plot_cm(exp4_1_results, ["shape", "act_func"], n_rows=4, n_cols=4)
No description has been provided for this image
In [330]:
exp4_1_accuracies, exp4_1_f1s = plot_pr(exp4_1_results, ["shape", "act_func"], n_rows=4, n_cols=4)
print_metrics(exp4_1_accuracies, exp4_1_f1s)
No description has been provided for this image
Accuracies:
0.915 0.932 0.924 0.937 0.903 0.901 0.910 0.918 0.196 0.078 0.078 0.196 0.925 0.925 0.925 0.925 

F1 Score Lists:
0.930 0.934 0.935 0.889 0.928 0.917 0.893 0.916 0.867 0.893 | Avg F1=0.910, Std F1=0.02214185752235704
0.924 0.950 0.952 0.909 0.948 0.936 0.912 0.934 0.892 0.909 | Avg F1=0.927, Std F1=0.019710223823189963
0.932 0.942 0.943 0.893 0.941 0.924 0.907 0.924 0.875 0.903 | Avg F1=0.918, Std F1=0.021909506799942538
0.940 0.951 0.957 0.918 0.948 0.937 0.922 0.937 0.904 0.910 | Avg F1=0.932, Std F1=0.01715071048985564
0.894 0.929 0.931 0.873 0.923 0.905 0.873 0.901 0.856 0.870 | Avg F1=0.895, Std F1=0.025416825296858667
0.891 0.922 0.934 0.877 0.908 0.902 0.882 0.905 0.857 0.851 | Avg F1=0.893, Std F1=0.025177077858284696
0.904 0.936 0.932 0.881 0.921 0.911 0.888 0.911 0.867 0.871 | Avg F1=0.902, Std F1=0.023355308505421304
0.908 0.939 0.947 0.883 0.937 0.922 0.901 0.911 0.885 0.873 | Avg F1=0.911, Std F1=0.024231694376209662
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.144 0.000 0.000 | Avg F1=0.014, Std F1=0.0431902452937821
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.144 0.000 0.000 | Avg F1=0.014, Std F1=0.04318562618088482
0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378
0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584
0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584
0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584
0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584
Best: 4-th
In [333]:
plot_rocauc(exp4_1_results, ["shape", "act_func"], curve_type="all", n_rows=4, n_cols=4)
No description has been provided for this image
In [334]:
plot_rocauc(exp4_1_results, ["shape", "act_func"], curve_type="macro_micro", n_rows=4, n_cols=4)
No description has been provided for this image

4. Select a utility model¶

In [394]:
utiliy_model_state = exp3_2_loaded[15]['model_state_dict']
time_str = str(time.time()).replace(".", "")
torch.save(utiliy_model_state, f"./models/utility_{time_str}.pth")